This file contains R code to model parking meter activity using various time series models:
For every model there are two versions which correspond with two datasets:
setwd("/Users/stephenreagin/Desktop/USD/ADS_506_final_project/final_project/datasets/")
library(forecast)
#install.packages('GGally')
library(fpp3)
library(tidyverse)
library(dplyr)
transactions_df <- read.csv("daily_total_transactions.csv")
transactions_df$date <- as.Date(transactions_df$date)
transactions_tsibble <- as_tsibble(transactions_df, index=date)
#training and validation data splits
train_transactions_tsibble <- transactions_tsibble |>
slice(0:(length(transactions_df$transactions)-90))
valid_transactions_tsibble <- transactions_tsibble |> slice(n()-89:0)
plot(x=transactions_tsibble$date, y=transactions_tsibble$transactions,
xlab="Date",ylab="Daily Transactions", main='Daily Transactions over Time (all days)')
######## remove holidays and Sundays
nonspecial_df <- subset(transactions_tsibble, (isSunday == 0) & (isHoliday == 0))
special_df <- subset(transactions_tsibble, (isSunday == 1) | (isHoliday == 1))
nonspecial_df$X <- 1:length(nonspecial_df$X)
nonspecial_df <- as_tsibble(nonspecial_df, index=date)
train_nonspecial_df <- nonspecial_df |> slice(0:(length(nonspecial_df$transactions)-90))
valid_nonspecial_df <- nonspecial_df |> slice(n()-89:0)
plot(x=nonspecial_df$date, y=nonspecial_df$transactions, xlab="Date",ylab="Daily Transactions",
main='Daily Transactions over Time (no Sundays or holidays)')
\(y_t = \beta_{0}+\beta_{1}{trend}+\beta_{2}season+\beta_{3}rain+\beta_{4}Tavg+\beta_{5}Tmin+\epsilon_{t}\)
#fit the model
fit_lm <- train_transactions_tsibble |> model(lm = TSLM(transactions ~ TAVG +
PRCP + TMIN + isSunday + isHoliday + trend() + season()))
report(fit_lm)
## Series: transactions
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -17515.5 -392.1 484.8 1164.8 15826.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.622e+04 7.253e+02 22.360 < 2e-16 ***
## TAVG 6.542e+01 7.058e+01 0.927 0.354191
## PRCP -5.896e+01 3.136e+01 -1.880 0.060399 .
## TMIN 1.437e+02 5.367e+01 2.677 0.007555 **
## isSunday -1.781e+04 3.828e+02 -46.515 < 2e-16 ***
## isHoliday -1.381e+04 5.190e+02 -26.608 < 2e-16 ***
## trend() -2.558e+00 3.794e-01 -6.743 2.72e-11 ***
## season()week2 8.892e+02 3.826e+02 2.324 0.020331 *
## season()week3 -7.695e+02 3.826e+02 -2.011 0.044621 *
## season()week4 NA NA NA NA
## season()week5 -1.328e+03 3.853e+02 -3.448 0.000591 ***
## season()week6 -5.042e+02 3.838e+02 -1.314 0.189284
## season()week7 -8.344e+01 3.833e+02 -0.218 0.827701
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3130 on 928 degrees of freedom
## Multiple R-squared: 0.8244, Adjusted R-squared: 0.8224
## F-statistic: 396.2 on 11 and 928 DF, p-value: < 2.22e-16
fit_lm |> gg_tsresiduals() + labs("Linear Regression training errors")
#forecast using validation data
fc_lm <- forecast(fit_lm, new_data = valid_transactions_tsibble)
#transactions_tsibble |> autoplot(transactions) + autolayer(fc_lm)
transactions_tsibble |> autoplot(transactions,color='darkgray') + autolayer(fc_lm, level=NULL) +
labs(title="Linear Regression 90-day forecast (training set 1)", xlab="Date",ylab="Transactions")
sum(fc_lm$.mean) / sum(valid_transactions_tsibble$transactions)
## [1] 0.9852133
accuracy(fit_lm)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 lm Training 3.30e-13 3110. 1545. NaN Inf 0.809 0.621 0.573
accuracy(fc_lm, valid_transactions_tsibble)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 lm Test 206. 1670. 858. 39.4 84.9 NaN NaN 0.144
fit_lm <- nonspecial_df |> model(lm = TSLM(transactions ~ TAVG + PRCP + TMIN + TMAX +
trend() + season()))
report(fit_lm)
## Series: transactions
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -17570.8 -235.7 470.7 1161.4 4354.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16174.4499 722.7406 22.379 < 2e-16 ***
## TAVG -49.5142 180.0678 -0.275 0.783404
## PRCP -150.6594 40.1717 -3.750 0.000189 ***
## TMIN 198.3696 91.9152 2.158 0.031201 *
## TMAX 77.0887 85.2088 0.905 0.365886
## trend() -2.9777 0.3478 -8.561 < 2e-16 ***
## season()week2 763.8270 359.2533 2.126 0.033786 *
## season()week3 -879.2885 358.4847 -2.453 0.014381 *
## season()week4 NA NA NA NA
## season()week5 -1507.8476 367.7587 -4.100 4.54e-05 ***
## season()week6 -554.7763 355.7285 -1.560 0.119248
## season()week7 -171.1755 355.2703 -0.482 0.630063
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3004 on 828 degrees of freedom
## Multiple R-squared: 0.2163, Adjusted R-squared: 0.2068
## F-statistic: 22.85 on 10 and 828 DF, p-value: < 2.22e-16
fc_lm <- forecast(fit_lm, new_data = valid_nonspecial_df)
nonspecial_df |> autoplot(transactions, color='darkgray') +
autolayer(fc_lm, level=NULL) +
labs(title="Linear Regression 90-day forecast (training set 2)", xlab="Date",ylab="Transactions")
sum(fc_lm$.mean) / sum(valid_nonspecial_df$transactions)
## [1] 0.992725
accuracy(fit_lm)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 lm Training 6.77e-13 2985. 1431. -Inf Inf 0.961 0.980 0.786
accuracy(fc_lm, valid_nonspecial_df)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 lm Test 124. 867. 612. 0.585 3.68 NaN NaN 0.427
For the following models, having gaps in the date will create errors. This is especially a problem for the dataset which includes No Sundays and No Holidays, because it has at least one gap every week. Therefore, I am going to create a dummy index and have the models use the dummy index as a proxy for the date.
#need to redefine tsibble objects to key on a dummy index
nonspecial_df <- subset(transactions_tsibble, (isSunday == 0) & (isHoliday == 0))
special_df <- subset(transactions_tsibble, (isSunday == 1) | (isHoliday == 1))
nonspecial_df$X <- 1:length(nonspecial_df$X)
nonspecial_df <- as_tsibble(nonspecial_df, index=X)
train_nonspecial_df <- nonspecial_df |> slice(0:(length(nonspecial_df$transactions)-90))
valid_nonspecial_df <- nonspecial_df |> slice(n()-89:0)
The (P,D,Q) values for the ARIMA models were automatically generated
by running
ARIMA(season_adjust, stepwise=FALSE, approx=FALSE) and
manually coding the results so as to reduce runtime in subsequent
notebook renderings.
fit <- train_transactions_tsibble |> model(ARIMA(transactions ~ pdq(0,0,2) + PDQ(0,1,1)))
report(fit)
## Series: transactions
## Model: ARIMA(0,0,2)(0,1,1)[7]
##
## Coefficients:
## ma1 ma2 sma1
## 0.3012 0.2211 -0.9206
## s.e. 0.0341 0.0295 0.0355
##
## sigma^2 estimated as 15390813: log likelihood=-9049.28
## AIC=18106.56 AICc=18106.6 BIC=18125.91
fit |> gg_tsresiduals() + labs("ARIMA training errors")
fc <- fit |> forecast(h=180)
transactions_tsibble |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
labs(title="ARIMA 180-day forecast (training set 1)", xlab="Date",ylab="Transactions")
sum(fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.050678
accuracy(fit)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(transactions ~ p… Trai… 48.9 3902. 2107. NaN Inf 1.10 0.779 0.0345
accuracy(fc[1:90,], valid_transactions_tsibble)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(transactions ~ pd… Test -707. 3039. 981. -149. 153. NaN NaN 0.117
fit <- train_nonspecial_df |> model(ARIMA(transactions ~ pdq(4,1,2)))
report(fit)
## Series: transactions
## Model: ARIMA(4,1,2)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2
## 0.3780 -0.7615 -0.2269 -0.2526 -0.7925 0.8152
## s.e. 0.0619 0.0548 0.0371 0.0391 0.0518 0.0616
##
## sigma^2 estimated as 4045882: log likelihood=-6748.47
## AIC=13510.94 AICc=13511.09 BIC=13543.26
fit |> gg_tsresiduals() + labs("ARIMA training errors")
fc <- fit |> forecast(h=180)
nonspecial_df |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
labs(title="ARIMA 180-day forecast (training set 2)", xlab="Date",ylab="Transactions")
sum(fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.25884
accuracy(fit)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(transactions ~ … Trai… 15.8 2002. 984. NaN Inf 0.890 0.895 -0.0120
accuracy(fc[1:90,], valid_nonspecial_df)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(transactions ~ pd… Test -450. 1157. 887. -3.05 5.41 NaN NaN 0.438
ets_fit <- train_transactions_tsibble |> model(ETS(transactions))
report(ets_fit)
## Series: transactions
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.193608
## gamma = 0.000100066
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 13192.06 2139.278 3599.093 2860.265 3203.008 2823.13 222.9795 -14847.75
##
## sigma^2: 14130134
##
## AIC AICc BIC
## 21922.07 21922.31 21970.53
ets_fit |> gg_tsresiduals() + labs("ETS training errors")
ets_fit |> components() |> autoplot()
ets_fc <- ets_fit |> forecast(h=180)
fitted_ets <- ets_fit |> augment()
#training
accuracy(fitted_ets$.fitted, train_transactions_tsibble$transactions)
## ME RMSE MAE MPE MAPE
## Test set 12.78305 3740.969 1909.357 NaN Inf
#validation
accuracy(ets_fc$.mean[1:90], valid_transactions_tsibble$transactions)
## ME RMSE MAE MPE MAPE
## Test set -1569.208 3338.123 1624.55 -186.2008 186.5356
ets_fc |> autoplot(transactions_tsibble, level=NULL, color='red') +
ggtitle("ETS Model (ANA) 180-day forecast (training set 1)") +
geom_line(data=ets_fit |> augment(), aes(y=.fitted), color='blue', lty=2)
sum(ets_fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.112513
accuracy(ets_fit)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(transactions) Training 12.8 3741. 1909. NaN Inf 0.999 0.746 0.0929
accuracy(ets_fc[1:90,], valid_transactions_tsibble)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(transactions) Test -1569. 3338. 1625. -186. 187. NaN NaN 0.120
ets_fit <- train_nonspecial_df |> model(ETS(transactions))
report(ets_fit)
## Series: transactions
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.4849877
##
## Initial states:
## l[0]
## 11301.53
##
## sigma^2: 4285722
##
## AIC AICc BIC
## 16399.26 16399.29 16413.12
ets_fit |> gg_tsresiduals() + labs("ETS training errors")
ets_fit |> components() |> autoplot()
ets_fc <- ets_fit |> forecast(h=180)
fitted_ets <- ets_fit |> augment()
ets_fc |> autoplot(nonspecial_df, level=NULL, color='red') +
ggtitle("ETS Model (ANN) 180-day forecast (training set 2") +
geom_line(data=ets_fit |> augment(), aes(y=.fitted), color='blue', lty=2)
sum(ets_fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.255849
accuracy(ets_fit)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(transactions) Training 17.1 2067. 1133. -Inf Inf 1.02 0.925 0.0941
accuracy(ets_fc[1:90,], valid_nonspecial_df)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(transactions) Test -408. 1152. 879. -2.82 5.36 NaN NaN 0.441
transactions_tsibble %>% model(STL(transactions ~ season(7) +
season(365), robust=TRUE)) %>% components() |> autoplot()
The new weekly season is 6 days, and the annual season is 294 (365 - 52 Sundays - 19 holidays = 294)
nonspecial_df %>% model(STL(transactions ~ season(6) +
season(294), robust=TRUE)) %>% components() |> autoplot()
dcmp_spec <- decomposition_model(STL(transactions ~ season(7) + season(365), robust=TRUE),
ETS(season_adjust))
dcmp_ets <- train_transactions_tsibble |> model(dcmp_spec)
report(dcmp_ets)
## Series: transactions
## Model: STL decomposition model
## Combination: season_adjust + season_7 + season_365
##
## ==================================================
##
## Series: season_adjust + season_7
## Model: COMBINATION
## Combination: season_adjust + season_7
##
## =====================================
##
## Series: season_adjust
## Model: ETS(A,Ad,A)
## Smoothing parameters:
## alpha = 0.06896141
## beta = 0.0001000607
## gamma = 0.0001000321
## phi = 0.94292
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 9509.027 390.1202 -182.1924 -148.0245 -89.21192 62.65256 290.1151 -373.4501
## s[-6]
## 440.1113
##
## sigma^2: 3938723
##
## AIC AICc BIC
## 20724.24 20724.63 20787.23
##
## Series: season_7
## Model: SNAIVE
##
## sigma^2: 2169.2607
##
##
## Series: season_365
## Model: SNAIVE
##
## sigma^2: 14877112.024
dcmp_ets |> gg_tsresiduals() + labs("STL + ETS training errors")
fc <- dcmp_ets |> forecast(h=180)
transactions_tsibble |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
labs(title="STL + ETS(A,Ad,A) 180-day forecast (training set 1)", xlab="Date",ylab="Transactions")
sum(fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions[1:90])
## [1] 0.9815249
accuracy(dcmp_ets)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Training -104. 4682. 1995. NaN Inf 1.04 0.934 0.223
accuracy(fc[1:90,], valid_nonspecial_df)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Test 3418. 7518. 4072. 19.6 23.8 4.13 5.92 0.974
dcmp_spec <- decomposition_model(STL(transactions ~ season(6) + season(294), robust=TRUE),
ETS(season_adjust))
dcmp_ets <- train_nonspecial_df |> model(dcmp_spec)
report(dcmp_ets)
## Series: transactions
## Model: STL decomposition model
## Combination: season_adjust + season_6 + season_294
##
## ==================================================
##
## Series: season_adjust + season_6
## Model: COMBINATION
## Combination: season_adjust + season_6
##
## =====================================
##
## Series: season_adjust
## Model: ETS(A,Ad,N)
## Smoothing parameters:
## alpha = 0.1015276
## beta = 0.0001000067
## phi = 0.9715424
##
## Initial states:
## l[0] b[0]
## 15132.8 123.2774
##
## sigma^2: 1310069
##
## AIC AICc BIC
## 15514.53 15514.64 15542.24
##
## Series: season_6
## Model: SNAIVE
##
## sigma^2: 14929.3458
##
##
## Series: season_294
## Model: SNAIVE
##
## sigma^2: 9131833.0067
dcmp_ets |> gg_tsresiduals() + labs("STL + ETS training errors")
fc <- dcmp_ets |> forecast(h=180)
nonspecial_df |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
labs(title="STL + ETS(A,Ad,N) 180-day forecast (training set 2", xlab="Date",ylab="Transactions")
sum(fc$.mean[1:90]) / sum(valid_nonspecial_df$transactions[1:90])
## [1] 0.9717736
accuracy(dcmp_ets)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Training -125. 3401. 1458. -Inf Inf 1.32 1.52 0.715
accuracy(fc[1:90,], valid_nonspecial_df)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Test 483. 1511. 1278. 2.44 7.50 NaN NaN 0.257
dcmp_spec <- decomposition_model(STL(transactions ~ season(7) + season(365), robust=TRUE),
ARIMA(season_adjust, stepwise=FALSE, approx=FALSE))
dcmp_arima <- train_transactions_tsibble |> model(dcmp_spec)
report(dcmp_arima)
## Series: transactions
## Model: STL decomposition model
## Combination: season_adjust + season_7 + season_365
##
## ==================================================
##
## Series: season_adjust + season_7
## Model: COMBINATION
## Combination: season_adjust + season_7
##
## =====================================
##
## Series: season_adjust
## Model: ARIMA(2,1,3)(0,0,1)[7]
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 sma1
## 1.4282 -0.7676 -2.2692 2.0326 -0.7494 0.2246
## s.e. 0.0719 0.0579 0.0637 0.0962 0.0415 0.0392
##
## sigma^2 estimated as 3742733: log likelihood=-8436.57
## AIC=16887.15 AICc=16887.27 BIC=16921.06
##
## Series: season_7
## Model: SNAIVE
##
## sigma^2: 2169.2607
##
##
## Series: season_365
## Model: SNAIVE
##
## sigma^2: 14877112.024
dcmp_arima |> gg_tsresiduals() + labs("STL + ARIMA training errors")
fc <- dcmp_arima |> forecast(h=180)
transactions_tsibble |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
labs(title="STL + ARIMA 180-day forecast (training set 1)", xlab="Date",ylab="Transactions")
sum(fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions)
## [1] 0.9818345
accuracy(dcmp_arima)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Training -128. 4646. 1979. NaN Inf 1.04 0.927 0.170
accuracy(fc[1:90,], valid_transactions_tsibble)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Test 253. 4204. 1641. -78.5 221. NaN NaN -0.429
dcmp_spec <- decomposition_model(STL(transactions ~ season(6) + season(294), robust=TRUE),
ARIMA(season_adjust, stepwise=FALSE, approx=FALSE))
dcmp_arima <- train_nonspecial_df |> model(dcmp_spec)
report(dcmp_arima)
## Series: transactions
## Model: STL decomposition model
## Combination: season_adjust + season_6 + season_294
##
## ==================================================
##
## Series: season_adjust + season_6
## Model: COMBINATION
## Combination: season_adjust + season_6
##
## =====================================
##
## Series: season_adjust
## Model: ARIMA(2,1,4)
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 ma4
## -0.8860 -0.9418 0.2706 0.1389 -0.8081 -0.2986
## s.e. 0.0463 0.0310 0.0539 0.0410 0.0310 0.0352
##
## sigma^2 estimated as 1226299: log likelihood=-6302.51
## AIC=12619.02 AICc=12619.17 BIC=12651.34
##
## Series: season_6
## Model: SNAIVE
##
## sigma^2: 14929.3458
##
##
## Series: season_294
## Model: SNAIVE
##
## sigma^2: 9131833.0067
dcmp_arima |> gg_tsresiduals() + labs("STL + ARIMA training errors")
fc <- dcmp_arima |> forecast(h=180)
nonspecial_df |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
labs(title="STL + ARIMA 180-day forecast (training set 2)", xlab="Date",ylab="Transactions")
sum(fc$.mean[1:90]) / sum(valid_nonspecial_df$transactions)
## [1] 0.9717556
accuracy(dcmp_arima)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Training -122. 3351. 1440. -Inf Inf 1.30 1.50 0.690
accuracy(fc[1:90,], valid_nonspecial_df)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_spec Test 483. 1511. 1274. 2.44 7.48 NaN NaN 0.256
fit <- train_transactions_tsibble |> model(
mean = MEAN(transactions),
naive = NAIVE(transactions),
drift = RW(transactions ~ drift()))
fc <- fit |> forecast(h=180)
transactions_tsibble |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
labs(title="Baseline 180-day forecasts (training set 1)", xlab="Date",ylab="Transactions")
accuracy(fit)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 mean Training -3.85e-12 7423. 5946. -Inf Inf 3.11 1.48 0.161
## 2 naive Training 1.76e+ 1 9608. 5898. -Inf Inf 3.09 1.92 -0.377
## 3 drift Training 8.33e-13 9608. 5894. -Inf Inf 3.09 1.92 -0.377
accuracy(fc[1:90,], valid_transactions_tsibble)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 mean Test -809. 6427. 4338. -1036. 1057. NaN NaN -0.0134
sum(fc$.mean[1:90]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.057975
accuracy(fc[181:270,], valid_transactions_tsibble)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 naive Test -2816. 6970. 3580. -1191. 1195. NaN NaN -0.0134
sum(fc$.mean[181:270]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.201911
accuracy(fc[361:450,], valid_transactions_tsibble)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 drift Test -3618. 7386. 3941. -1259. 1261. NaN NaN 0.00278
sum(fc$.mean[361:450]) / sum(valid_transactions_tsibble$transactions)
## [1] 1.259421
fit <- train_nonspecial_df |> model(
mean = MEAN(transactions),
naive = NAIVE(transactions),
drift = RW(transactions ~ drift()))
fc <- fit |> forecast(h=180)
nonspecial_df |> autoplot(transactions, color='darkgray') + autolayer(fc, level=NULL) +
labs(title="Baseline 180-day forecasts (training set 2)", xlab="Date",ylab="Transactions")
accuracy(fit)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 mean Training -5.70e-12 3539. 1803. -Inf Inf 1.63 1.58 0.798
## 2 naive Training 8.89e+ 0 2236. 1106. -Inf Inf 1 1 -0.288
## 3 drift Training 4.67e-13 2236. 1104. -Inf Inf 0.998 1.00 -0.288
accuracy(fc[1:90,], valid_nonspecial_df)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 mean Test -791. 1336. 1051. -5.06 6.45 NaN NaN 0.441
sum(fc$.mean[1:90]) / sum(valid_nonspecial_df$transactions)
## [1] 1.046237
accuracy(fc[181:270,], valid_nonspecial_df)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 naive Test -547. 1208. 937. -3.63 5.73 NaN NaN 0.441
sum(fc$.mean[181:270]) / sum(valid_nonspecial_df$transactions)
## [1] 1.03197
accuracy(fc[361:450,], valid_nonspecial_df)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 drift Test -951. 1530. 1217. -6.04 7.46 NaN NaN 0.537
sum(fc$.mean[361:450]) / sum(valid_nonspecial_df$transactions)
## [1] 1.055602
library(xts)
num_transactions.ts <- ts(transactions_df$transactions, start=c(2020,295), freq=365)
#creating time series vectors
total_transactions.xts <- xts(transactions_df$transactions, order.by = transactions_df$date)
isHoliday.xts <- xts(transactions_df$isHoliday, order.by = transactions_df$date)
precip.xts <- xts(transactions_df$PRCP, order.by = transactions_df$date)
tavg.xts <- xts(transactions_df$TAVG, order.by = transactions_df$date)
weekly_transactions.xts <- apply.weekly(total_transactions.xts, sum)
weekly_precip.xts <- apply.weekly(precip.xts, sum)
weekly_tavg.xts <- apply.weekly(tavg.xts, mean)
#weekly transactions with holidays
plot.xts(weekly_transactions.xts, main='Weekly Transactions over Time',ylim=c(0,150000))
lines(weekly_precip.xts * 1000 + 20000, col='blue', lty=2, lwd=1)
points(isHoliday.xts * 100000-1, pch = 20, col='purple')
addLegend("topleft",c('Weekly transactions', "Rainfall",'Holiday'), lty=c(1,2,3),
col=c('black','blue','purple'), lwd=c(1,1,2))
transactions_tsibble |> ggplot(aes(x=TAVG, y=transactions)) + geom_point() +
geom_smooth(method = 'lm', se=FALSE) + labs(title="Transactions as a function of Temperature (C)")
nonspecial_df |> ggplot(aes(x=TAVG, y=transactions)) + geom_point() +
geom_smooth(method = 'lm', se=FALSE) + labs(title = "Transactions as a Function of Temperature (Sundays & Holidays removed)")
train_transactions_tsibble |> ACF(difference(transactions), lag_max = 180) |> autoplot() +
labs(title = "ACF for Training Set 1")
train_transactions_tsibble |> autoplot(difference(transactions, lag=7)) +
labs(title = "Training Set 1, lag-7 difference")
train_nonspecial_df |> ACF(difference(transactions), lag_max = 180) |> autoplot() +
labs(title = "ACF for Training Set 2")
train_nonspecial_df |> autoplot(difference(transactions, lag=1)) +
labs(title = "Training Set 1, lag-7 difference")